From 1f5b281382b8511481bdd1ac0baef417a32779fb Mon Sep 17 00:00:00 2001
From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= <pippin@gimp.org>
Date: Sat, 31 Mar 2012 14:58:49 +0100
Subject: [PATCH] extensions: add an extension with some float optimizations

Alos improve some alpha unpremultiplications in other extensions.
---
 extensions/Makefile.am   |   4 +
 extensions/float.c       | 324 +++++++++++++++++++++++++++++++++++++++
 extensions/gegl-fixups.c |   6 +-
 extensions/gggl.c        |   3 +-
 4 files changed, 333 insertions(+), 4 deletions(-)
 create mode 100644 extensions/float.c

diff --git a/extensions/Makefile.am b/extensions/Makefile.am
index fac0123..c017046 100644
--- a/extensions/Makefile.am
+++ b/extensions/Makefile.am
@@ -6,6 +6,7 @@ endif
 
 noinst_HEADERS = util.h
 
+
 AM_CPPFLAGS = \
 	-I$(top_builddir)		\
 	-I$(top_srcdir)			\
@@ -20,6 +21,7 @@ ext_LTLIBRARIES = \
 	gggl-lies.la    \
 	gggl.la         \
 	gimp-8bit.la    \
+	float.la    \
 	naive-CMYK.la   \
 	sse-fixups.la
 
@@ -31,7 +33,9 @@ gggl_la_SOURCES = gggl.c
 gimp_8bit_la_SOURCES = gimp-8bit.c
 naive_CMYK_la_SOURCES = naive-CMYK.c
 sse_fixups_la_SOURCES = sse-fixups.c
+float_la_SOURCES = float.c
 
 LIBS = $(top_builddir)/babl/libbabl-@BABL_API_VERSION@.la $(MATH_LIB)
 
 sse_fixups_la_CFLAGS = $(MMX_EXTRA_CFLAGS) $(SSE_EXTRA_CFLAGS)
+float_la_CFLAGS = -std=c99
diff --git a/extensions/float.c b/extensions/float.c
new file mode 100644
index 0000000..4fc7f98
--- /dev/null
+++ b/extensions/float.c
@@ -0,0 +1,324 @@
+/* babl - dynamically extendable universal pixel conversion library.
+ * Copyright (C) 2012, Ãyvind KolÃ¥s
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "babl.h"
+#include "babl-cpuaccel.h"
+#include "extensions/util.h"
+#include "base/util.h"
+
+#define INLINE inline
+
+
+/*
+   optimized powf from:
+
+http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent
+
+   by David Hammen
+*/
+
+// Returns x^(5/12) for x in [1,2), to within 3e-8 (relative error).
+// Want more precision? Add more Chebychev polynomial coefs.
+static INLINE double pow512norm (
+   double x)
+{
+   static const int N = 8;
+
+   // Chebychev polynomial terms.
+   // Non-zero terms calculated via
+   //   integrate (2/pi)*ChebyshevT[n,u]/sqrt(1-u^2)*((u+3)/2)^(5/12)
+   //   from -1 to 1
+   // Zeroth term is similar except it uses 1/pi rather than 2/pi.
+   static const double Cn[8] = { 
+       1.1758200232996901923,
+       0.16665763094889061230,
+      -0.0083154894939042125035,
+       0.00075187976780420279038,
+      // Wolfram alpha doesn't want to compute the remaining terms
+      // to more precision (it times out).
+      -0.0000832402,
+       0.0000102292,
+      -1.3401e-6,
+       1.83334e-7};
+
+   double Tn[N];
+
+   double u = 2.0*x - 3.0;
+   int i;
+   double y = 0.0;
+
+   Tn[0] = 1.0;
+   Tn[1] = u;
+   for (i = 2; i < N; ++i) {
+      Tn[i] = 2*u*Tn[i-1] - Tn[i-2];
+   }   
+
+   for (i = N-1; i >= 0; --i) {
+      y += Cn[i]*Tn[i];
+   }   
+
+   return y;
+}
+
+// Returns x^(5/12) to within 3e-8 (relative error).
+static INLINE double pow512 (
+   double x)
+{
+   static const double pow2_512[12] = {
+      1.0,
+      pow(2.0, 5.0/12.0),
+      pow(4.0, 5.0/12.0),
+      pow(8.0, 5.0/12.0),
+      pow(16.0, 5.0/12.0),
+      pow(32.0, 5.0/12.0),
+      pow(64.0, 5.0/12.0),
+      pow(128.0, 5.0/12.0),
+      pow(256.0, 5.0/12.0),
+      pow(512.0, 5.0/12.0),
+      pow(1024.0, 5.0/12.0),
+      pow(2048.0, 5.0/12.0)
+   };
+
+   double s;
+   int iexp;
+
+   s = frexp (x, &iexp);
+   s *= 2.0;
+   iexp -= 1;
+
+   div_t qr = div (iexp, 12);
+   if (qr.rem < 0) {
+      qr.quot -= 1;
+      qr.rem += 12;
+   }
+
+   return ldexp (pow512norm(s)*pow2_512[qr.rem], 5*qr.quot);
+}
+
+static inline double
+flinear_to_gamma_2_2 (double value)
+{
+  if (value > 0.0030402477F)
+    return 1.055F * pow512 (value) - 0.055F;
+  return 12.92F * value;
+}
+
+static INLINE long
+conv_rgbaF_linear_rgbAF_gamma (unsigned char *src, 
+                               unsigned char *dst, 
+                               long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       float alpha = fsrc[3];
+       *fdst++ = flinear_to_gamma_2_2 (*fsrc++) * alpha;
+       *fdst++ = flinear_to_gamma_2_2 (*fsrc++) * alpha;
+       *fdst++ = flinear_to_gamma_2_2 (*fsrc++) * alpha;
+       *fdst++ = *fsrc++;
+     }
+  return samples;
+}
+
+static INLINE long
+conv_rgbAF_linear_rgbAF_gamma (unsigned char *src, 
+                               unsigned char *dst, 
+                               long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       float alpha = fsrc[3];
+       if (alpha < BABL_ALPHA_THRESHOLD)
+         {
+           *fdst++ = 0.0;
+           *fdst++ = 0.0;
+           *fdst++ = 0.0;
+           *fdst++ = 0.0;
+           fsrc+=4;
+         }
+       else if (alpha >= 1.0)
+         {
+           *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+           *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+           *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+           *fdst++ = *fsrc++;
+         }
+       else
+         {
+           float alpha_recip = 1.0 / alpha;
+           *fdst++ = flinear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
+           *fdst++ = flinear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
+           *fdst++ = flinear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
+           *fdst++ = *fsrc++;
+         }
+     }
+  return samples;
+}
+
+static INLINE long
+conv_rgbaF_linear_rgbaF_gamma (unsigned char *src, 
+                               unsigned char *dst, 
+                               long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = *fsrc++;
+     }
+  return samples;
+}
+
+static INLINE long
+conv_rgbF_linear_rgbF_gamma (unsigned char *src, 
+                             unsigned char *dst, 
+                             long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+       *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+     }
+  return samples;
+}
+
+
+static INLINE long
+conv_rgbaF_gamma_rgbaF_linear (unsigned char *src, 
+                               unsigned char *dst, 
+                               long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       *fdst++ = gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = *fsrc++;
+     }
+  return samples;
+}
+
+static INLINE long
+conv_rgbF_gamma_rgbF_linear (unsigned char *src, 
+                             unsigned char *dst, 
+                             long           samples)
+{
+   float *fsrc = (float *) src;
+   float *fdst = (float *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       *fdst++ = gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = gamma_2_2_to_linear (*fsrc++);
+       *fdst++ = gamma_2_2_to_linear (*fsrc++);
+     }
+  return samples;
+}
+
+#define o(src, dst) \
+  babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL)
+
+int init (void);
+
+int
+init (void)
+{
+  const Babl *rgbaF_linear = babl_format_new (
+    babl_model ("RGBA"),
+    babl_type ("float"),
+    babl_component ("R"),
+    babl_component ("G"),
+    babl_component ("B"),
+    babl_component ("A"),
+    NULL);
+  const Babl *rgbAF_linear = babl_format_new (
+    babl_model ("RaGaBaA"),
+    babl_type ("float"),
+    babl_component ("Ra"),
+    babl_component ("Ga"),
+    babl_component ("Ba"),
+    babl_component ("A"),
+    NULL);
+  const Babl *rgbaF_gamma = babl_format_new (
+    babl_model ("R'G'B'A"),
+    babl_type ("float"),
+    babl_component ("R'"),
+    babl_component ("G'"),
+    babl_component ("B'"),
+    babl_component ("A"),
+    NULL);
+  const Babl *rgbAF_gamma = babl_format_new (
+    babl_model ("R'aG'aB'aA"),
+    babl_type ("float"),
+    babl_component ("R'a"),
+    babl_component ("G'a"),
+    babl_component ("B'a"),
+    babl_component ("A"),
+    NULL);
+  const Babl *rgbF_linear = babl_format_new (
+    babl_model ("RGB"),
+    babl_type ("float"),
+    babl_component ("R"),
+    babl_component ("G"),
+    babl_component ("B"),
+    NULL);
+  const Babl *rgbF_gamma = babl_format_new (
+    babl_model ("R'G'B'"),
+    babl_type ("float"),
+    babl_component ("R'"),
+    babl_component ("G'"),
+    babl_component ("B'"),
+    NULL);
+
+  o (rgbAF_linear, rgbAF_gamma);
+  o (rgbaF_linear, rgbAF_gamma);
+  o (rgbaF_linear, rgbaF_gamma);
+  o (rgbaF_gamma,  rgbaF_linear);
+  o (rgbF_linear, rgbF_gamma);
+  o (rgbF_gamma,  rgbF_linear);
+
+  return 0;
+}
+
diff --git a/extensions/gegl-fixups.c b/extensions/gegl-fixups.c
index 3fb6977..7f3b8ff 100644
--- a/extensions/gegl-fixups.c
+++ b/extensions/gegl-fixups.c
@@ -473,9 +473,11 @@ conv_rgbAF_rgbaF (unsigned char *srcc,
   while (n--)
     {
       float alpha = src[3];
-      float recip = 1.0/alpha;
+      float recip;
       if (alpha < BABL_ALPHA_THRESHOLD)
         recip = 0.0;
+      else
+        recip = 1.0/alpha;
       dst[0] = src[0] * recip;
       dst[1] = src[1] * recip;
       dst[2] = src[2] * recip;
@@ -518,8 +520,6 @@ conv_rgbAF_lrgba8 (unsigned char *srcc,
   return samples;
 }
 
-
-
 #define conv_rgb8_rgbAF    conv_rgb8_rgbaF
 
 int init (void);
diff --git a/extensions/gggl.c b/extensions/gggl.c
index 16d3276..6a007c6 100644
--- a/extensions/gggl.c
+++ b/extensions/gggl.c
@@ -610,9 +610,10 @@ conv_rgbAF_rgbaF (unsigned char *src, unsigned char *dst, long samples)
         }
       else
         {
+          float alpha_recip = 1.0 / alpha;
           for (c = 0; c < 3; c++)
             {
-              *(float *) dst = ((*(float *) src) / alpha);
+              *(float *) dst = ((*(float *) src) * alpha_recip);
               dst           += 4;
               src           += 4;
             }
-- 
2.30.2